import scanpy as sc
import pandas as pd
import numpy as np
import os
import celltypist
from scipy.sparse import csr_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import pickle as pkl
from pydeseq2.dds import DeseqDataSet
from pydeseq2.default_inference import DefaultInference
from pydeseq2.ds import DeseqStats
import time
import gseapy as gp
from gseapy import Biomart
from sanbomics.plots import volcano
import networkx as nx
from bioinfokit import analys, visuz
import pegasus as pg
2024-09-13 17:07:27.934666: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
Loading BAL file
read_file2 = '/home/jana/Updated_SCANPY_QC_filtered_BAL_for_Sarcoidosis.h5ad'
bal=sc.read_h5ad(read_file2)
bal
AnnData object with n_obs × n_vars = 74334 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
Ensuring Normalization and Logarithmize the data matrix
sc.pp.normalize_total(bal, target_sum=1e4)
sc.pp.log1p(bal)
bal
AnnData object with n_obs × n_vars = 74334 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
BAL counts
print(bal.obs['type'].value_counts().sort_index())
type Healthy 47547 Sarcoidosis 26787 Name: count, dtype: int64
print(bal.obs['sample'].value_counts().sort_index())
sample BAL-Sarc-1 11007 BAL-Sarc-2 11236 BAL-Sarc-3 4544 BAL-healthy-1 5603 BAL-healthy-2 5705 BAL-healthy-3 4331 BAL-healthy-4 4386 BAL-healthy-5 5220 BAL-healthy-6 4726 BAL-healthy-7 5327 BAL-healthy-8 6102 BAL-healthy-9 2434 BAL-healthy-10 3713 Name: count, dtype: int64
|
cluster0: 'Alveolar macrophages' cluster1: 'CD16- NK cells' cluster2: 'Classical monocytes' cluster3: 'DC1' cluster4: 'DC2' cluster5: 'Macrophages' cluster6: 'Memory B cells' |
cluster7: 'Migratory DCs' cluster8: 'Plasmablasts' cluster9: 'Regulatory T cells' cluster10: 'Tcm/Naive helper T cells' cluster11: 'Tem/Effector helper T cells' cluster12: 'Tem/Temra cytotoxic T cells' cluster13: 'Tem/Trm cytotoxic T cells' |
# Define the clusters and conditions
bal_clusters = [
'Alveolar macrophages','CD16- NK cells','Classical monocytes','DC1','DC2','Macrophages',
'Memory B cells','Migratory DCs','Plasmablasts',
'Regulatory T cells','Tcm/Naive helper T cells','Tem/Effector helper T cells',
'Tem/Temra cytotoxic T cells','Tem/Trm cytotoxic T cells'
]
bal_conditions = [
'BAL-Sarc-1', 'BAL-Sarc-2', 'BAL-Sarc-3',
'BAL-healthy-1', 'BAL-healthy-2', 'BAL-healthy-3', 'BAL-healthy-4',
'BAL-healthy-5', 'BAL-healthy-6', 'BAL-healthy-7', 'BAL-healthy-8',
'BAL-healthy-9', 'BAL-healthy-10'
]
bal_conditions_type = [
'Healthy', 'Sarcoidosis'
]
# Create a dictionary to hold the clusters
bal_celltype= {}
# Initialize an empty list to store the data
data3 = []
data4 = []
# Process each cluster
for i, cluster_id in enumerate(bal_clusters):
bal_celltype[f'cluster{i}'] = bal[bal.obs['celltypist_cell_label_fine'] == cluster_id]
#rank gene
sc.tl.rank_genes_groups(bal_celltype[f'cluster{i}'], groupby='type', use_raw=False, method='wilcoxon', groups=['Sarcoidosis'], reference='Healthy', key_added='wilcoxon_cp_with_Sarcoidosis')
sc.tl.rank_genes_groups(bal_celltype[f'cluster{i}'], groupby='type', use_raw=False, method='wilcoxon', groups=['Healthy'], reference='Sarcoidosis', key_added='wilcoxon_cp_with_Healthy')
# Get counts for each condition
bal_condition_counts = bal_celltype[f'cluster{i}'].obs['sample'].value_counts()
bal_condition_type_counts = bal_celltype[f'cluster{i}'].obs['type'].value_counts()
# Create a dictionary for the row data with default 0 values
row_data3 = {'celltypist_cell_label_fine': cluster_id}
for condition in bal_conditions:
row_data3[condition] = bal_condition_counts.get(condition, 0)
## Create a dictionary for the row data with default 0 values
row_data4 = {'celltypist_cell_label_fine': cluster_id}
for condition in bal_conditions_type:
row_data4[condition] = bal_condition_type_counts.get(condition, 0)
# Append row data to the list
data3.append(row_data3)
data4.append(row_data4)
#data2.append(row_data2)
# Convert the list to a DataFrame
df3 = pd.DataFrame(data3)
#display(df)
df4 = pd.DataFrame(data4)
#display(df_condition)
df_merged_bal = pd.concat([df4, df3],axis=1, join="inner")
display(df_merged_bal)
####
| celltypist_cell_label_fine | Healthy | Sarcoidosis | celltypist_cell_label_fine | BAL-Sarc-1 | BAL-Sarc-2 | BAL-Sarc-3 | BAL-healthy-1 | BAL-healthy-2 | BAL-healthy-3 | BAL-healthy-4 | BAL-healthy-5 | BAL-healthy-6 | BAL-healthy-7 | BAL-healthy-8 | BAL-healthy-9 | BAL-healthy-10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alveolar macrophages | 43008 | 23642 | Alveolar macrophages | 9983 | 10288 | 3371 | 5127 | 5312 | 3975 | 3862 | 5002 | 3813 | 4835 | 5411 | 2298 | 3373 |
| 1 | CD16- NK cells | 221 | 229 | CD16- NK cells | 34 | 99 | 96 | 33 | 15 | 13 | 20 | 10 | 46 | 30 | 34 | 5 | 15 |
| 2 | Classical monocytes | 4 | 22 | Classical monocytes | 1 | 1 | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 |
| 3 | DC1 | 83 | 73 | DC1 | 30 | 27 | 16 | 7 | 7 | 8 | 10 | 3 | 7 | 14 | 10 | 5 | 12 |
| 4 | DC2 | 215 | 113 | DC2 | 43 | 40 | 30 | 13 | 23 | 19 | 9 | 16 | 40 | 17 | 32 | 18 | 28 |
| 5 | Macrophages | 247 | 77 | Macrophages | 47 | 18 | 12 | 24 | 33 | 15 | 16 | 32 | 25 | 21 | 39 | 13 | 29 |
| 6 | Memory B cells | 188 | 161 | Memory B cells | 40 | 48 | 73 | 61 | 19 | 20 | 7 | 18 | 29 | 11 | 10 | 10 | 3 |
| 7 | Migratory DCs | 74 | 75 | Migratory DCs | 22 | 34 | 19 | 8 | 7 | 7 | 5 | 6 | 9 | 6 | 15 | 3 | 8 |
| 8 | Plasmablasts | 28 | 23 | Plasmablasts | 7 | 16 | 0 | 4 | 6 | 3 | 1 | 3 | 1 | 2 | 7 | 1 | 0 |
| 9 | Regulatory T cells | 377 | 283 | Regulatory T cells | 96 | 79 | 108 | 63 | 30 | 33 | 44 | 12 | 81 | 34 | 45 | 7 | 28 |
| 10 | Tcm/Naive helper T cells | 328 | 143 | Tcm/Naive helper T cells | 68 | 17 | 58 | 29 | 22 | 18 | 38 | 14 | 92 | 47 | 40 | 9 | 19 |
| 11 | Tem/Effector helper T cells | 1221 | 1060 | Tem/Effector helper T cells | 344 | 290 | 426 | 117 | 85 | 85 | 180 | 27 | 240 | 132 | 252 | 27 | 76 |
| 12 | Tem/Temra cytotoxic T cells | 768 | 483 | Tem/Temra cytotoxic T cells | 153 | 131 | 199 | 52 | 53 | 48 | 104 | 38 | 191 | 86 | 129 | 16 | 51 |
| 13 | Tem/Trm cytotoxic T cells | 785 | 403 | Tem/Trm cytotoxic T cells | 139 | 148 | 116 | 65 | 93 | 87 | 90 | 39 | 152 | 92 | 74 | 22 | 71 |
Merging dataframes
df_merged_bal1=df4.set_index('celltypist_cell_label_fine').join(df3.set_index('celltypist_cell_label_fine'))
display(df_merged_bal1)
| Healthy | Sarcoidosis | BAL-Sarc-1 | BAL-Sarc-2 | BAL-Sarc-3 | BAL-healthy-1 | BAL-healthy-2 | BAL-healthy-3 | BAL-healthy-4 | BAL-healthy-5 | BAL-healthy-6 | BAL-healthy-7 | BAL-healthy-8 | BAL-healthy-9 | BAL-healthy-10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| celltypist_cell_label_fine | |||||||||||||||
| Alveolar macrophages | 43008 | 23642 | 9983 | 10288 | 3371 | 5127 | 5312 | 3975 | 3862 | 5002 | 3813 | 4835 | 5411 | 2298 | 3373 |
| CD16- NK cells | 221 | 229 | 34 | 99 | 96 | 33 | 15 | 13 | 20 | 10 | 46 | 30 | 34 | 5 | 15 |
| Classical monocytes | 4 | 22 | 1 | 1 | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 |
| DC1 | 83 | 73 | 30 | 27 | 16 | 7 | 7 | 8 | 10 | 3 | 7 | 14 | 10 | 5 | 12 |
| DC2 | 215 | 113 | 43 | 40 | 30 | 13 | 23 | 19 | 9 | 16 | 40 | 17 | 32 | 18 | 28 |
| Macrophages | 247 | 77 | 47 | 18 | 12 | 24 | 33 | 15 | 16 | 32 | 25 | 21 | 39 | 13 | 29 |
| Memory B cells | 188 | 161 | 40 | 48 | 73 | 61 | 19 | 20 | 7 | 18 | 29 | 11 | 10 | 10 | 3 |
| Migratory DCs | 74 | 75 | 22 | 34 | 19 | 8 | 7 | 7 | 5 | 6 | 9 | 6 | 15 | 3 | 8 |
| Plasmablasts | 28 | 23 | 7 | 16 | 0 | 4 | 6 | 3 | 1 | 3 | 1 | 2 | 7 | 1 | 0 |
| Regulatory T cells | 377 | 283 | 96 | 79 | 108 | 63 | 30 | 33 | 44 | 12 | 81 | 34 | 45 | 7 | 28 |
| Tcm/Naive helper T cells | 328 | 143 | 68 | 17 | 58 | 29 | 22 | 18 | 38 | 14 | 92 | 47 | 40 | 9 | 19 |
| Tem/Effector helper T cells | 1221 | 1060 | 344 | 290 | 426 | 117 | 85 | 85 | 180 | 27 | 240 | 132 | 252 | 27 | 76 |
| Tem/Temra cytotoxic T cells | 768 | 483 | 153 | 131 | 199 | 52 | 53 | 48 | 104 | 38 | 191 | 86 | 129 | 16 | 51 |
| Tem/Trm cytotoxic T cells | 785 | 403 | 139 | 148 | 116 | 65 | 93 | 87 | 90 | 39 | 152 | 92 | 74 | 22 | 71 |
Print Subseting all cell type clusters
# Print out the clusters
for key, value in bal_celltype.items():
print(f"{key} = {value}")
cluster0 = AnnData object with n_obs × n_vars = 66650 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster1 = AnnData object with n_obs × n_vars = 450 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster2 = AnnData object with n_obs × n_vars = 26 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster3 = AnnData object with n_obs × n_vars = 156 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster4 = AnnData object with n_obs × n_vars = 328 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster5 = AnnData object with n_obs × n_vars = 324 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster6 = AnnData object with n_obs × n_vars = 349 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster7 = AnnData object with n_obs × n_vars = 149 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster8 = AnnData object with n_obs × n_vars = 51 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster9 = AnnData object with n_obs × n_vars = 660 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster10 = AnnData object with n_obs × n_vars = 471 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster11 = AnnData object with n_obs × n_vars = 2281 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster12 = AnnData object with n_obs × n_vars = 1251 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
cluster13 = AnnData object with n_obs × n_vars = 1188 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
obsp: 'connectivities', 'distances'
Ploting Subseting UMAP based on celltypist_cell_label_fine
import matplotlib.gridspec as gridspec
cell_subset1=bal_celltype['cluster0']
cell_subset2=bal_celltype['cluster1']
cell_subset3=bal_celltype['cluster2']
cell_subset4=bal_celltype['cluster3']
cell_subset5=bal_celltype['cluster4']
cell_subset6=bal_celltype['cluster5']
cell_subset7=bal_celltype['cluster6']
cell_subset8=bal_celltype['cluster7']
cell_subset9=bal_celltype['cluster8']
cell_subset10=bal_celltype['cluster9']
cell_subset11=bal_celltype['cluster10']
cell_subset12=bal_celltype['cluster11']
cell_subset13=bal_celltype['cluster12']
cell_subset14=bal_celltype['cluster13']
# List of actual AnnData objects for plotting
bal_datasets = [
cell_subset1, cell_subset2, cell_subset3, cell_subset4, cell_subset5, cell_subset6, cell_subset7, cell_subset8,
cell_subset9, cell_subset10, cell_subset11, cell_subset12, cell_subset13, cell_subset14
# Add more clusters or datasets as needed
]
# Ensure you have 19 datasets or adjust the grid size accordingly
fig = plt.figure(figsize=(20, 16))
gs = gridspec.GridSpec(5, 4, figure=fig)
# Plot UMAPs in the grid layout
for i, adata in enumerate(bal_datasets):
ax = fig.add_subplot(gs[i])
sc.pl.umap(adata, ax=ax, show=False, title=f'UMAP Plot {i+1}', color='celltypist_cell_label_fine')
# Hide the last subplot if there are fewer than 15 datasets
if len(bal_datasets) < 15:
ax_empty = fig.add_subplot(gs[-1])
ax_empty.axis('off')
# Adjust layout
plt.tight_layout()
plt.show()
# List of actual AnnData objects for plotting
# Ensure you have 19 datasets or adjust the grid size accordingly
fig = plt.figure(figsize=(20, 16))
gs = gridspec.GridSpec(5, 4, figure=fig)
# Plot UMAPs in the grid layout
for i, adata in enumerate(bal_datasets):
ax = fig.add_subplot(gs[i])
sc.pl.umap(adata, ax=ax, show=False, title=f'BAL cluster {i+1}', color='type')
# Hide the last subplot if there are fewer than 20 datasets
if len(bal_datasets) < 15:
ax_empty = fig.add_subplot(gs[-1])
ax_empty.axis('off')
# Adjust layout
plt.tight_layout()
plt.show()
def perform_deseq_analysis(cell_data, output_file, grouping_column="type", condition1="Sarcoidosis", condition2="Healthy", top_n_genes=10):
# List to store summarized data for each sample
summarized_samples = []
# Loop over each unique sample in the dataset
for sample in cell_data.obs['sample'].unique():
# Subset the data for the current sample
sample_data = cell_data[cell_data.obs['sample'] == sample]
# Sum the counts for the sample and create a new AnnData object
sample_summary = sc.AnnData(
X=sample_data.X.sum(axis=0), # Summing counts across cells for each sample
var=sample_data.var[[]] # Keep the gene names but no additional information
)
# Assign sample name and type (grouping_column) information
sample_summary.obs_names = [sample]
sample_summary.obs[grouping_column] = sample_data.obs[grouping_column].iloc[0]
# Append the summarized data for this sample to the list
summarized_samples.append(sample_summary)
# Combine all the summarized sample data into one AnnData object
combined_data = sc.concat(summarized_samples)
# Convert the combined data to a pandas DataFrame (counts matrix)
counts_matrix = pd.DataFrame(combined_data.X.astype(int), columns=combined_data.var_names)
# Set up the DESeq2 inference object with multiple CPUs for faster processing
inference = DefaultInference(n_cpus=8)
# Create a DESeqDataSet for differential expression analysis
dds = DeseqDataSet(
counts=counts_matrix, # The counts matrix
metadata=combined_data.obs, # Metadata including sample grouping info
design_factors=grouping_column, # The grouping factor (e.g., "type")
refit_cooks=True, # Refit to reduce outliers (optional)
inference=inference # Parallelization settings
)
# Run DESeq2 analysis
dds.deseq2()
# Perform statistical analysis with DESeq2 for the given conditions
stat_results = DeseqStats(dds, contrast=(grouping_column, condition1, condition2))
# Display a summary of the statistical results
print(stat_results.summary())
# Get the differential expression results as a DataFrame
de_results = stat_results.results_df
# Plot an MA plot to visualize results
stat_results.plot_MA(s=10)
# Sort the results by the statistic value for easier interpretation
sorted_results = de_results.sort_values('stat', ascending=False)
# Save the sorted results to a CSV file
sorted_results.to_csv(output_file)
# Extract the top N differentially expressed genes
top_genes = sorted_results.index[:top_n_genes].tolist()
# Generate a violin plot for the top N genes in the original dataset (cell_data)
sc.pl.violin(cell_data, keys=top_genes, groupby=grouping_column, jitter=0.4, rotation=90, size=1)
# Return the sorted results DataFrame for further inspection if needed
return sorted_results
cell_subset1.layers["counts"] = cell_subset1.X.copy()
cell_subset1
AnnData object with n_obs × n_vars = 66650 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
cell_subset1 counts
print(cell_subset1.obs['type'].value_counts().sort_index())
type Healthy 43008 Sarcoidosis 23642 Name: count, dtype: int64
print(cell_subset1.obs['sample'].value_counts().sort_index())
sample BAL-Sarc-1 9983 BAL-Sarc-2 10288 BAL-Sarc-3 3371 BAL-healthy-1 5127 BAL-healthy-2 5312 BAL-healthy-3 3975 BAL-healthy-4 3862 BAL-healthy-5 5002 BAL-healthy-6 3813 BAL-healthy-7 4835 BAL-healthy-8 5411 BAL-healthy-9 2298 BAL-healthy-10 3373 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (7, 8)}):
sc.pl.umap(cell_subset1, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 10)
sc.pl.rank_genes_groups_violin(cell_subset1, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset1, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset1, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset1, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
Sarcoidosis vs. Healthy based on Wilcoxon test summary
scores_wilcox_cl1= sc.get.rank_genes_groups_df(cell_subset1, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl1.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl1.csv")
scores_wilcox_cl1
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | IFI30 | 192.243774 | 3.572342 | 0.0 | 0.0 |
| 1 | ATP6V0C | 176.765686 | 7.920133 | 0.0 | 0.0 |
| 2 | RNASEK | 173.965820 | 4.534781 | 0.0 | 0.0 |
| 3 | EEF1G | 166.126831 | 5.980704 | 0.0 | 0.0 |
| 4 | NME2 | 163.472763 | 5.726843 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... |
| 14029 | FOS | -121.345772 | -2.284603 | 0.0 | 0.0 |
| 14030 | HP | -122.202271 | -2.881449 | 0.0 | 0.0 |
| 14031 | HLA-DRB5 | -122.246712 | -2.887452 | 0.0 | 0.0 |
| 14032 | DUSP1 | -145.877945 | -2.766120 | 0.0 | 0.0 |
| 14033 | FKBP2 | -180.421143 | -5.426423 | 0.0 | 0.0 |
14034 rows × 5 columns
Deseq2 execution
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result1 = perform_deseq_analysis(cell_subset1, "/home/jana/BAL_GEX/cl_deseq_result1.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. Fitting dispersions... ... done in 6.76 seconds. Fitting dispersion trend curve... ... done in 2.36 seconds. Fitting MAP dispersions... ... done in 7.88 seconds. Fitting LFCs... ... done in 5.88 seconds. Refitting 31 outliers. Fitting dispersions... ... done in 0.06 seconds. Fitting MAP dispersions... ... done in 0.07 seconds. Fitting LFCs... ... done in 0.06 seconds. Running Wald tests... ... done in 7.03 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue \
LINC00115 76.897824 -0.296700 0.334717 -0.886420 0.375391
FAM41C 57.768631 -0.697438 0.361862 -1.927357 0.053935
NOC2L 1341.884474 -0.352085 0.105615 -3.333681 0.000857
KLHL17 78.453455 -0.601849 0.307835 -1.955104 0.050571
PLEKHN1 17.459038 0.018165 0.492443 0.036887 0.970575
... ... ... ... ... ...
AC011043.1 21.615770 -1.591017 0.789067 -2.016327 0.043766
AL354822.1 177.000122 -0.351962 0.462642 -0.760764 0.446798
AL592183.1 131.363044 -1.906006 0.502836 -3.790514 0.000150
AC240274.1 416.963286 -1.246492 0.405322 -3.075312 0.002103
AC007325.4 366.527747 -1.390173 0.541803 -2.565830 0.010293
padj
LINC00115 0.573394
FAM41C 0.156252
NOC2L 0.006911
KLHL17 0.148783
PLEKHN1 0.987050
... ...
AC011043.1 0.134454
AL354822.1 0.637652
AL592183.1 0.001708
AC240274.1 0.013834
AC007325.4 0.046086
[14034 rows x 6 columns]
None
cl_deseq_result1
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| DBNDD2 | 932.105957 | 8.758383 | 0.219891 | 39.830603 | 0.000000e+00 | 0.000000e+00 |
| ARHGEF18 | 261.462349 | 6.379437 | 0.174756 | 36.504863 | 9.284495e-292 | 6.507967e-288 |
| ANKHD1 | 1252.733555 | 2.400446 | 0.074122 | 32.385012 | 4.462110e-230 | 2.085144e-226 |
| AL627171.2 | 720.842963 | 7.919702 | 0.264271 | 29.968086 | 2.557763e-197 | 8.964318e-194 |
| TRAPPC5 | 1926.191424 | 4.336261 | 0.161934 | 26.777894 | 5.846782e-158 | 1.366101e-154 |
| ... | ... | ... | ... | ... | ... | ... |
| FKBP2 | 3991.784572 | -3.708628 | 0.192519 | -19.263698 | 1.083569e-82 | 6.329398e-80 |
| AC005332.7 | 381.425931 | -5.696156 | 0.295224 | -19.294350 | 5.991317e-83 | 3.817831e-80 |
| THTPA | 546.481101 | -5.701094 | 0.293277 | -19.439263 | 3.592906e-84 | 2.398521e-81 |
| FAM220A | 392.480351 | -5.557277 | 0.263631 | -21.079751 | 1.220227e-98 | 9.503537e-96 |
| CHTF8 | 1272.819381 | -4.520217 | 0.157522 | -28.695869 | 4.296028e-181 | 1.204520e-177 |
14034 rows × 6 columns
cell_subset2.layers["counts"] = cell_subset2.X.copy()
cell_subset2
AnnData object with n_obs × n_vars = 450 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
Cluster 02: 'CD16- NK cells' counts
print(cell_subset2.obs['type'].value_counts().sort_index())
type Healthy 221 Sarcoidosis 229 Name: count, dtype: int64
print(cell_subset2.obs['sample'].value_counts().sort_index())
sample BAL-Sarc-1 34 BAL-Sarc-2 99 BAL-Sarc-3 96 BAL-healthy-1 33 BAL-healthy-2 15 BAL-healthy-3 13 BAL-healthy-4 20 BAL-healthy-5 10 BAL-healthy-6 46 BAL-healthy-7 30 BAL-healthy-8 34 BAL-healthy-9 5 BAL-healthy-10 15 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset2, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset2, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset2, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset2, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset2, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
Sarcoidosis vs. Healthy based on Wilcoxon test summary
scores_wilcox_cl2= sc.get.rank_genes_groups_df(cell_subset2, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl2.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl2.csv")
scores_wilcox_cl2
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | TMSB4X | 12.913388 | 0.488026 | 3.782832e-38 | 5.308827e-34 |
| 1 | CRIP1 | 11.902620 | 3.112091 | 1.146882e-32 | 8.047669e-29 |
| 2 | B2M | 10.707682 | 0.389938 | 9.367830e-27 | 4.139850e-23 |
| 3 | EEF1G | 10.686292 | 6.289941 | 1.179949e-26 | 4.139850e-23 |
| 4 | MTRNR2L12 | 10.603632 | 3.774621 | 2.866274e-26 | 8.045058e-23 |
| ... | ... | ... | ... | ... | ... |
| 14029 | JUN | -5.960917 | -2.178349 | 2.508254e-09 | 1.257172e-06 |
| 14030 | TSC22D3 | -6.009136 | -2.002730 | 1.865149e-09 | 9.694630e-07 |
| 14031 | PNISR | -6.319472 | -2.017260 | 2.624588e-10 | 1.416672e-07 |
| 14032 | LUC7L3 | -6.415183 | -2.154715 | 1.406539e-10 | 8.582331e-08 |
| 14033 | KLF6 | -7.667041 | -2.416080 | 1.760097e-14 | 1.764371e-11 |
14034 rows × 5 columns
DEseq2 execution
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result2 = perform_deseq_analysis(cell_subset2, "/home/jana/BAL_GEX/cl_deseq_result2.csv", top_n_genes=10)
Fitting size factors... ... done in 0.01 seconds. Fitting dispersions... ... done in 5.65 seconds. Fitting dispersion trend curve... ... done in 4.24 seconds. Fitting MAP dispersions... ... done in 18.49 seconds. Fitting LFCs... ... done in 5.33 seconds. Refitting 2 outliers. Fitting dispersions... ... done in 0.01 seconds. Fitting MAP dispersions... ... done in 0.03 seconds. Fitting LFCs... ... done in 0.01 seconds. Running Wald tests... ... done in 8.31 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.315917 -2.699021 3.243619 -0.832102 0.405352 NaN
FAM41C 0.831630 -2.347661 4.057575 -0.578587 0.562868 NaN
NOC2L 5.014431 -0.351497 0.767798 -0.457798 0.647097 0.956955
KLHL17 0.000000 NaN NaN NaN NaN NaN
PLEKHN1 0.767122 -2.087281 3.682865 -0.566755 0.570881 NaN
... ... ... ... ... ... ...
AC011043.1 0.168904 -0.349077 2.876056 -0.121374 0.903395 NaN
AL354822.1 0.774459 -1.049347 2.515857 -0.417093 0.676610 NaN
AL592183.1 0.705932 -2.950792 3.559669 -0.828951 0.407132 NaN
AC240274.1 0.000000 NaN NaN NaN NaN NaN
AC007325.4 0.437971 0.067824 3.752846 0.018073 0.985581 NaN
[14034 rows x 6 columns]
None
cl_deseq_result2
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| RNASEK | 11.098165 | 4.362670 | 0.324581 | 13.440916 | 3.481010e-41 | 2.587435e-37 |
| GABARAP | 9.491035 | 3.667551 | 0.297119 | 12.343731 | 5.265799e-35 | 1.957034e-31 |
| NME2 | 9.479106 | 6.398722 | 0.705141 | 9.074382 | 1.143241e-19 | 2.832570e-16 |
| ATP6V0C | 6.749394 | 7.059417 | 0.974394 | 7.244929 | 4.326667e-13 | 8.040029e-10 |
| POLD4 | 3.533002 | 2.753728 | 0.381852 | 7.211508 | 5.533568e-13 | 8.226202e-10 |
| ... | ... | ... | ... | ... | ... | ... |
| PNMA3 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| SLC6A8 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| TMLHE-AS1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| SPRY3 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC240274.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset3.layers["counts"] = cell_subset3.X.copy()
cell_subset3
AnnData object with n_obs × n_vars = 26 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset3.obs['type'].value_counts().sort_index())
type Healthy 4 Sarcoidosis 22 Name: count, dtype: int64
print(cell_subset3.obs['sample'].value_counts().sort_index())
sample BAL-Sarc-1 1 BAL-Sarc-2 1 BAL-Sarc-3 20 BAL-healthy-8 4 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset3, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset3, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset3, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset3, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset3, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl3= sc.get.rank_genes_groups_df(cell_subset3, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl3.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl3.csv")
scores_wilcox_cl3
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | S100A9 | 2.913743 | 2.627406 | 0.003571 | 1.000000 |
| 1 | B2M | 2.913743 | 0.777922 | 0.003571 | 1.000000 |
| 2 | H3F3A | 2.913743 | 1.228228 | 0.003571 | 1.000000 |
| 3 | CSF3R | 2.878210 | 4.939673 | 0.003999 | 1.000000 |
| 4 | SOD2 | 2.842676 | 34.603874 | 0.004474 | 1.000000 |
| ... | ... | ... | ... | ... | ... |
| 14029 | OPTN | -3.126944 | -32.526829 | 0.001766 | 0.953414 |
| 14030 | COX4I1 | -3.126944 | -6.850245 | 0.001766 | 0.953414 |
| 14031 | SLC25A5 | -3.126944 | -32.965893 | 0.001766 | 0.953414 |
| 14032 | CD81 | -3.126944 | -7.544667 | 0.001766 | 0.953414 |
| 14033 | IFI6 | -3.126944 | -33.720715 | 0.001766 | 0.953414 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result3 = perform_deseq_analysis(cell_subset3, "/home/jana/BAL_GEX/cl_deseq_result3.csv", top_n_genes=10)
Fitting size factors... ... done in 0.00 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), Fitting dispersions... ... done in 1.83 seconds. Fitting dispersion trend curve... ... done in 1.28 seconds. Fitting MAP dispersions... ... done in 2.76 seconds. Fitting LFCs... ... done in 3.99 seconds. Refitting 0 outliers. Running Wald tests... ... done in 7.30 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.0 NaN NaN NaN NaN NaN
FAM41C 0.0 NaN NaN NaN NaN NaN
NOC2L 0.0 NaN NaN NaN NaN NaN
KLHL17 0.0 NaN NaN NaN NaN NaN
PLEKHN1 0.0 NaN NaN NaN NaN NaN
... ... ... ... ... ... ...
AC011043.1 0.0 NaN NaN NaN NaN NaN
AL354822.1 0.0 NaN NaN NaN NaN NaN
AL592183.1 0.0 NaN NaN NaN NaN NaN
AC240274.1 0.0 NaN NaN NaN NaN NaN
AC007325.4 0.0 NaN NaN NaN NaN NaN
[14034 rows x 6 columns]
None
cl_deseq_result3
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| MTRNR2L12 | 4.858237 | 5.757824 | 5.367899 | 1.072640 | 0.283433 | 0.961732 |
| SOD2 | 6.836391 | 6.245394 | 5.834117 | 1.070495 | 0.284396 | 0.961732 |
| IFI30 | 4.518041 | 5.639953 | 5.322969 | 1.059550 | 0.289349 | 0.961732 |
| CELF2 | 4.518041 | 5.639953 | 5.322969 | 1.059550 | 0.289349 | 0.961732 |
| CSNK1A1 | 3.813343 | 5.387615 | 5.307011 | 1.015188 | 0.310016 | 0.961732 |
| ... | ... | ... | ... | ... | ... | ... |
| AC011043.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AL354822.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AL592183.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC240274.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC007325.4 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset4.layers["counts"] = cell_subset4.X.copy()
cell_subset4
print(cell_subset4.obs['type'].value_counts().sort_index())
print(cell_subset4.obs['sample'].value_counts().sort_index())
type Healthy 83 Sarcoidosis 73 Name: count, dtype: int64 sample BAL-Sarc-1 30 BAL-Sarc-2 27 BAL-Sarc-3 16 BAL-healthy-1 7 BAL-healthy-2 7 BAL-healthy-3 8 BAL-healthy-4 10 BAL-healthy-5 3 BAL-healthy-6 7 BAL-healthy-7 14 BAL-healthy-8 10 BAL-healthy-9 5 BAL-healthy-10 12 Name: count, dtype: int64
cell_subset4
AnnData object with n_obs × n_vars = 156 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset4, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset4, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset4, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset4, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset4, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl4= sc.get.rank_genes_groups_df(cell_subset4, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl4.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl4.csv")
scores_wilcox_cl4
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | EEF1G | 9.907559 | 5.572472 | 3.859589e-23 | 5.416547e-19 |
| 1 | NME2 | 9.460041 | 5.998307 | 3.078223e-21 | 2.159989e-17 |
| 2 | RNASEK | 8.671556 | 4.678363 | 4.262537e-18 | 1.994015e-14 |
| 3 | CRIP1 | 8.021589 | 2.550772 | 1.043858e-15 | 3.662375e-12 |
| 4 | B2M | 7.957658 | 0.487020 | 1.753262e-15 | 4.921056e-12 |
| ... | ... | ... | ... | ... | ... |
| 14029 | HIST1H4C | -5.428824 | -2.108451 | 5.672658e-08 | 2.211391e-05 |
| 14030 | IER2 | -5.453686 | -2.323527 | 4.933625e-08 | 2.036426e-05 |
| 14031 | HIST1H2BG | -5.821290 | -3.883642 | 5.839496e-09 | 3.035240e-06 |
| 14032 | FKBP2 | -6.128515 | -6.242632 | 8.870283e-10 | 5.658434e-07 |
| 14033 | EIF3L | -6.671931 | -2.490670 | 2.524600e-11 | 2.362015e-08 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result4 = perform_deseq_analysis(cell_subset4, "/home/jana/BAL_GEX/cl_deseq_result4.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) Fitting dispersions... ... done in 4.69 seconds. Fitting dispersion trend curve... ... done in 2.09 seconds. Fitting MAP dispersions... ... done in 5.55 seconds. Fitting LFCs... ... done in 4.39 seconds. Refitting 0 outliers. Running Wald tests... ... done in 7.38 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.158812 -1.780710 3.644724 -0.488572 0.625145 NaN
FAM41C 0.096732 -1.492171 4.059466 -0.367578 0.713188 NaN
NOC2L 2.258040 -1.474442 0.915645 -1.610277 0.107337 NaN
KLHL17 0.080733 -1.492171 4.059466 -0.367578 0.713188 NaN
PLEKHN1 0.565953 -1.978495 1.973063 -1.002753 0.315980 NaN
... ... ... ... ... ... ...
AC011043.1 0.000000 NaN NaN NaN NaN NaN
AL354822.1 0.342061 -0.157126 1.877632 -0.083683 0.933308 NaN
AL592183.1 0.134290 -1.780708 4.038580 -0.440924 0.659268 NaN
AC240274.1 0.542315 -1.397328 1.967044 -0.710369 0.477475 NaN
AC007325.4 0.348956 -2.381540 2.582953 -0.922022 0.356517 NaN
[14034 rows x 6 columns]
None
cl_deseq_result4
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| EEF1G | 8.335950 | 4.216183 | 0.477023 | 8.838526 | 9.698969e-19 | 1.125080e-15 |
| RNASEK | 6.678526 | 3.853449 | 0.456811 | 8.435542 | 3.296716e-17 | NaN |
| NME2 | 6.865878 | 5.134170 | 0.615896 | 8.336103 | 7.677890e-17 | 4.453176e-14 |
| GABARAP | 8.714790 | 2.364181 | 0.340493 | 6.943409 | 3.827483e-12 | 1.479960e-09 |
| ATP6V0C | 3.622396 | 5.880791 | 1.012545 | 5.807933 | 6.324898e-09 | NaN |
| ... | ... | ... | ... | ... | ... | ... |
| PNMA3 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| SLC6A8 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| F8 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| MTCP1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC011043.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset5.layers["counts"] = cell_subset5.X.copy()
cell_subset5
print(cell_subset5.obs['type'].value_counts().sort_index())
print(cell_subset5.obs['sample'].value_counts().sort_index())
type Healthy 215 Sarcoidosis 113 Name: count, dtype: int64 sample BAL-Sarc-1 43 BAL-Sarc-2 40 BAL-Sarc-3 30 BAL-healthy-1 13 BAL-healthy-2 23 BAL-healthy-3 19 BAL-healthy-4 9 BAL-healthy-5 16 BAL-healthy-6 40 BAL-healthy-7 17 BAL-healthy-8 32 BAL-healthy-9 18 BAL-healthy-10 28 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset5, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset5, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset5, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset5, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset5, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl5= sc.get.rank_genes_groups_df(cell_subset5, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl5.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl5.csv")
scores_wilcox_cl5
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | IFI30 | 13.263009 | 3.747166 | 3.793626e-40 | 5.323975e-36 |
| 1 | EEF1G | 12.825585 | 5.759164 | 1.178930e-37 | 8.272550e-34 |
| 2 | RNASEK | 12.008938 | 3.973756 | 3.189132e-33 | 1.491876e-29 |
| 3 | B2M | 11.499836 | 0.583893 | 1.321667e-30 | 4.637070e-27 |
| 4 | FTL | 11.123675 | 0.670867 | 9.622413e-29 | 2.700819e-25 |
| ... | ... | ... | ... | ... | ... |
| 14029 | HIST1H2BG | -7.690450 | -5.750666 | 1.466184e-14 | 6.637555e-12 |
| 14030 | EIF3L | -7.707604 | -2.233089 | 1.282019e-14 | 5.997285e-12 |
| 14031 | FKBP2 | -7.957561 | -5.508389 | 1.754645e-15 | 8.794530e-13 |
| 14032 | DUSP1 | -8.027401 | -2.910961 | 9.955911e-16 | 5.174861e-13 |
| 14033 | JUN | -8.678023 | -2.980392 | 4.027070e-18 | 2.825795e-15 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result5 = perform_deseq_analysis(cell_subset5, "/home/jana/BAL_GEX/cl_deseq_result5.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) Fitting dispersions... ... done in 5.51 seconds. Fitting dispersion trend curve... ... done in 2.41 seconds. Fitting MAP dispersions... ... done in 5.61 seconds. Fitting LFCs... ... done in 5.47 seconds. Refitting 1 outliers. Fitting dispersions... ... done in 0.01 seconds. Fitting MAP dispersions... ... done in 0.01 seconds. Fitting LFCs... ... done in 0.01 seconds. Running Wald tests... ... done in 7.25 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.362778 0.534966 1.980012 0.270183 0.787019 NaN
FAM41C 0.158913 0.365645 3.214318 0.113755 0.909432 NaN
NOC2L 5.315026 -1.195779 0.629423 -1.899803 0.057459 0.415905
KLHL17 0.166057 -0.910388 3.423198 -0.265947 0.790280 NaN
PLEKHN1 0.086414 0.380212 3.829465 0.099286 0.920911 NaN
... ... ... ... ... ... ...
AC011043.1 0.064491 -0.596795 4.061370 -0.146944 0.883176 NaN
AL354822.1 0.327577 -0.258913 2.192471 -0.118092 0.905995 NaN
AL592183.1 0.376742 -0.770317 1.989616 -0.387168 0.698632 NaN
AC240274.1 0.552153 -2.241147 2.360575 -0.949407 0.342413 NaN
AC007325.4 0.399160 -1.875746 2.480201 -0.756288 0.449477 NaN
[14034 rows x 6 columns]
None
cl_deseq_result5
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| NME2 | 12.831990 | 5.911147 | 0.526845 | 11.219890 | 3.256741e-29 | 2.720356e-25 |
| RNASEK | 21.343759 | 3.236541 | 0.290753 | 11.131566 | 8.807447e-29 | 3.678430e-25 |
| GABARAP | 24.456822 | 2.207544 | 0.228535 | 9.659565 | 4.477598e-22 | 1.246713e-18 |
| ATP6V0C | 10.778903 | 6.529507 | 0.693145 | 9.420124 | 4.505565e-21 | 9.408746e-18 |
| IFI30 | 28.620390 | 2.705530 | 0.293517 | 9.217639 | 3.037129e-20 | 5.073828e-17 |
| ... | ... | ... | ... | ... | ... | ... |
| LINC02243 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| LINC00632 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC244197.2 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| GABRE | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| PNMA3 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset6.layers["counts"] = cell_subset6.X.copy()
cell_subset6
AnnData object with n_obs × n_vars = 324 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset6.obs['type'].value_counts().sort_index())
print(cell_subset6.obs['sample'].value_counts().sort_index())
type Healthy 247 Sarcoidosis 77 Name: count, dtype: int64 sample BAL-Sarc-1 47 BAL-Sarc-2 18 BAL-Sarc-3 12 BAL-healthy-1 24 BAL-healthy-2 33 BAL-healthy-3 15 BAL-healthy-4 16 BAL-healthy-5 32 BAL-healthy-6 25 BAL-healthy-7 21 BAL-healthy-8 39 BAL-healthy-9 13 BAL-healthy-10 29 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset6, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset6, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset6, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset6, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset6, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl6= sc.get.rank_genes_groups_df(cell_subset6, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl6.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl6.csv")
scores_wilcox_cl6
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | CRIP1 | 12.656345 | 1.982860 | 1.031923e-36 | 1.448200e-32 |
| 1 | EEF1G | 12.465459 | 6.186185 | 1.152067e-35 | 8.084056e-32 |
| 2 | ATP6V0C | 11.996602 | 7.897579 | 3.701842e-33 | 1.731722e-29 |
| 3 | IFI30 | 11.841942 | 3.365353 | 2.369028e-32 | 8.311734e-29 |
| 4 | GABARAP | 11.722116 | 2.690197 | 9.818909e-32 | 2.755971e-28 |
| ... | ... | ... | ... | ... | ... |
| 14029 | HP | -8.098751 | -2.312070 | 5.552611e-16 | 3.388058e-13 |
| 14030 | NUCB2 | -8.200465 | -1.514539 | 2.394594e-16 | 1.545341e-13 |
| 14031 | CHD1L | -8.895739 | -4.893092 | 5.803223e-19 | 6.264802e-16 |
| 14032 | DUSP1 | -9.167439 | -2.621006 | 4.843930e-20 | 6.179973e-17 |
| 14033 | FKBP2 | -11.677528 | -5.709217 | 1.660532e-31 | 3.329130e-28 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result6 = perform_deseq_analysis(cell_subset6, "/home/jana/BAL_GEX/cl_deseq_result6.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. Fitting dispersions... ... done in 5.54 seconds. Fitting dispersion trend curve... ... done in 1.59 seconds. Fitting MAP dispersions... ... done in 8.56 seconds. Fitting LFCs... ... done in 4.66 seconds. Refitting 1 outliers. Fitting dispersions... ... done in 0.01 seconds. Fitting MAP dispersions... ... done in 0.01 seconds. Fitting LFCs... ... done in 0.01 seconds. Running Wald tests... ... done in 7.46 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.240787 -0.292116 2.230056 -0.130991 0.895783 NaN
FAM41C 0.376103 -0.677308 2.733639 -0.247768 0.804314 NaN
NOC2L 3.967722 -1.077031 0.671299 -1.604399 0.108626 NaN
KLHL17 0.066548 0.279526 2.006240 0.139328 0.889191 NaN
PLEKHN1 0.041630 0.279521 4.092563 0.068300 0.945547 NaN
... ... ... ... ... ... ...
AC011043.1 0.000000 NaN NaN NaN NaN NaN
AL354822.1 0.987483 3.841624 2.521831 1.523347 0.127672 NaN
AL592183.1 0.199157 -0.009014 3.223168 -0.002797 0.997769 NaN
AC240274.1 1.883581 -1.578118 1.125564 -1.402069 0.160895 NaN
AC007325.4 1.915510 -2.210996 1.479777 -1.494141 0.135139 NaN
[14034 rows x 6 columns]
None
cl_deseq_result6
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| RNASEK | 15.749011 | 3.476809 | 0.264635 | 13.138107 | 1.991462e-39 | NaN |
| NME2 | 11.969965 | 5.485449 | 0.450448 | 12.177770 | 4.082947e-34 | NaN |
| EEF1G | 13.524791 | 5.618934 | 0.501977 | 11.193601 | 4.382666e-29 | NaN |
| TRAPPC5 | 8.040576 | 4.433170 | 0.431185 | 10.281371 | 8.550247e-25 | NaN |
| GNG10 | 7.636460 | 3.468523 | 0.372533 | 9.310645 | 1.270584e-20 | NaN |
| ... | ... | ... | ... | ... | ... | ... |
| PNMA3 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| SLC6A8 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| MTCP1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| TMLHE-AS1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC011043.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset7.layers["counts"] = cell_subset7.X.copy()
cell_subset7
AnnData object with n_obs × n_vars = 349 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset7.obs['type'].value_counts().sort_index())
print(cell_subset7.obs['sample'].value_counts().sort_index())
type Healthy 188 Sarcoidosis 161 Name: count, dtype: int64 sample BAL-Sarc-1 40 BAL-Sarc-2 48 BAL-Sarc-3 73 BAL-healthy-1 61 BAL-healthy-2 19 BAL-healthy-3 20 BAL-healthy-4 7 BAL-healthy-5 18 BAL-healthy-6 29 BAL-healthy-7 11 BAL-healthy-8 10 BAL-healthy-9 10 BAL-healthy-10 3 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset7, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset7, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset7, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset7, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset7, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl7= sc.get.rank_genes_groups_df(cell_subset7, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl7.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl7.csv")
scores_wilcox_cl7
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | EEF1G | 12.682219 | 6.602707 | 7.420148e-37 | 1.041344e-32 |
| 1 | MTRNR2L12 | 11.011797 | 3.541509 | 3.352490e-28 | 2.352442e-24 |
| 2 | B2M | 10.331176 | 0.574998 | 5.093219e-25 | 2.382608e-21 |
| 3 | RNASEK | 10.198138 | 4.491120 | 2.021090e-24 | 7.090996e-21 |
| 4 | HLA-DRB1 | 10.119912 | 0.679437 | 4.508194e-24 | 1.265360e-20 |
| ... | ... | ... | ... | ... | ... |
| 14029 | FKBP2 | -6.155921 | -6.466157 | 7.464253e-10 | 2.992952e-07 |
| 14030 | MAP3K1 | -6.158050 | -2.604691 | 7.364639e-10 | 2.992952e-07 |
| 14031 | EIF2AK2 | -6.374103 | -2.276145 | 1.840373e-10 | 8.331548e-08 |
| 14032 | EIF3L | -6.507140 | -2.299397 | 7.659486e-11 | 3.583107e-08 |
| 14033 | JUN | -7.767273 | -2.819345 | 8.019357e-15 | 6.252425e-12 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result7 = perform_deseq_analysis(cell_subset7, "/home/jana/BAL_GEX/cl_deseq_result7.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) Fitting dispersions... ... done in 5.56 seconds. Fitting dispersion trend curve... ... done in 2.34 seconds. Fitting MAP dispersions... ... done in 5.13 seconds. Fitting LFCs... ... done in 5.52 seconds. Refitting 5 outliers. Fitting dispersions... ... done in 0.02 seconds. Fitting MAP dispersions... ... done in 0.02 seconds. Fitting LFCs... ... done in 0.02 seconds. Running Wald tests... ... done in 8.34 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.642194 -3.501013 2.649596 -1.321338 0.186389 NaN
FAM41C 0.116120 -1.600944 4.069215 -0.393428 0.694003 NaN
NOC2L 4.862128 -0.702108 0.647758 -1.083905 0.278407 0.999854
KLHL17 0.000000 NaN NaN NaN NaN NaN
PLEKHN1 0.044782 -1.632465 4.066852 -0.401407 0.688120 NaN
... ... ... ... ... ... ...
AC011043.1 0.025781 -0.340404 3.866528 -0.088039 0.929846 NaN
AL354822.1 0.409574 -1.969858 2.325944 -0.846907 0.397047 NaN
AL592183.1 0.769099 -0.507597 1.720734 -0.294989 0.768002 NaN
AC240274.1 0.365697 -1.761057 3.017011 -0.583709 0.559416 NaN
AC007325.4 0.730523 -2.943997 2.662740 -1.105627 0.268888 NaN
[14034 rows x 6 columns]
None
cl_deseq_result7
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| EEF1G | 14.126544 | 5.628292 | 0.537888 | 10.463682 | 1.268287e-25 | 7.474388e-22 |
| RNASEK | 13.281055 | 3.851548 | 0.369685 | 10.418454 | 2.042462e-25 | 7.474388e-22 |
| NME2 | 9.800446 | 5.488429 | 0.595575 | 9.215349 | 3.102659e-20 | 7.569455e-17 |
| GABARAP | 12.622720 | 3.287863 | 0.377261 | 8.715097 | 2.905092e-18 | 5.315591e-15 |
| ATP6V0C | 5.741917 | 6.852391 | 1.112363 | 6.160212 | 7.264761e-10 | 9.027893e-07 |
| ... | ... | ... | ... | ... | ... | ... |
| CD40LG | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC244197.2 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| MAMLD1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| SLC6A8 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| TMLHE-AS1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset8.layers["counts"] = cell_subset8.X.copy()
cell_subset8
AnnData object with n_obs × n_vars = 149 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset8.obs['type'].value_counts().sort_index())
print(cell_subset8.obs['sample'].value_counts().sort_index())
type Healthy 74 Sarcoidosis 75 Name: count, dtype: int64 sample BAL-Sarc-1 22 BAL-Sarc-2 34 BAL-Sarc-3 19 BAL-healthy-1 8 BAL-healthy-2 7 BAL-healthy-3 7 BAL-healthy-4 5 BAL-healthy-5 6 BAL-healthy-6 9 BAL-healthy-7 6 BAL-healthy-8 15 BAL-healthy-9 3 BAL-healthy-10 8 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset8, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset8, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset8, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset8, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset8, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl8= sc.get.rank_genes_groups_df(cell_subset8, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl8.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl8.csv")
scores_wilcox_cl8
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | IFI30 | 9.685208 | 4.167756 | 3.484978e-22 | 4.890818e-18 |
| 1 | NME2 | 8.270963 | 6.181851 | 1.328815e-16 | 9.324298e-13 |
| 2 | RNASEK | 7.735638 | 3.598285 | 1.028861e-14 | 3.889222e-11 |
| 3 | EEF1G | 7.726146 | 32.481079 | 1.108514e-14 | 3.889222e-11 |
| 4 | FTL | 7.023769 | 0.694631 | 2.159615e-12 | 6.061607e-09 |
| ... | ... | ... | ... | ... | ... |
| 14029 | HIST1H4C | -5.366539 | -2.602955 | 8.026171e-08 | 4.022832e-05 |
| 14030 | MYO1G | -5.605727 | -2.535963 | 2.073825e-08 | 1.077928e-05 |
| 14031 | EIF3L | -5.846813 | -2.544686 | 5.010795e-09 | 3.196432e-06 |
| 14032 | SELPLG | -5.968306 | -2.952102 | 2.397302e-09 | 1.682187e-06 |
| 14033 | PNISR | -6.378342 | -2.331063 | 1.790157e-10 | 2.093589e-07 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result8 = perform_deseq_analysis(cell_subset8, "/home/jana/BAL_GEX/cl_deseq_result8.csv", top_n_genes=10)
Fitting size factors... ... done in 0.01 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) Fitting dispersions... ... done in 4.53 seconds. Fitting dispersion trend curve... ... done in 2.47 seconds. Fitting MAP dispersions... ... done in 7.10 seconds. Fitting LFCs... ... done in 4.60 seconds. Refitting 3 outliers. Fitting dispersions... ... done in 0.02 seconds. Fitting MAP dispersions... ... done in 0.02 seconds. Fitting LFCs... ... done in 0.02 seconds. Running Wald tests... ... done in 7.52 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.000000 NaN NaN NaN NaN NaN
FAM41C 0.000000 NaN NaN NaN NaN NaN
NOC2L 0.997809 1.553135 0.886607 1.751774 0.079813 0.716636
KLHL17 0.065705 -1.640166 2.154174 -0.761390 0.446424 NaN
PLEKHN1 0.200424 0.911529 1.779627 0.512202 0.608509 NaN
... ... ... ... ... ... ...
AC011043.1 0.000000 NaN NaN NaN NaN NaN
AL354822.1 0.493858 -2.624040 3.221460 -0.814550 0.415330 NaN
AL592183.1 0.000000 NaN NaN NaN NaN NaN
AC240274.1 0.143590 -1.948170 2.644302 -0.736742 0.461279 NaN
AC007325.4 0.400342 -1.557944 1.714986 -0.908430 0.363651 NaN
[14034 rows x 6 columns]
None
cl_deseq_result8
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| RNASEK | 8.033567 | 2.870620 | 0.433548 | 6.621221 | 3.562432e-11 | 1.976793e-07 |
| IFI30 | 10.612151 | 2.824216 | 0.470675 | 6.000348 | 1.968950e-09 | 5.462852e-06 |
| NME2 | 5.934686 | 5.061644 | 0.866146 | 5.843869 | 5.100219e-09 | 9.433704e-06 |
| EEF1G | 5.273095 | 6.692586 | 1.159731 | 5.770807 | 7.889265e-09 | 1.094438e-05 |
| ATP6V0C | 3.802441 | 5.902562 | 1.155954 | 5.106226 | 3.286574e-07 | 3.259510e-04 |
| ... | ... | ... | ... | ... | ... | ... |
| MTCP1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| TMLHE-AS1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| SPRY3 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC011043.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AL592183.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset9.layers["counts"] = cell_subset9.X.copy()
cell_subset9
AnnData object with n_obs × n_vars = 51 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset9.obs['type'].value_counts().sort_index())
print(cell_subset9.obs['sample'].value_counts().sort_index())
type Healthy 28 Sarcoidosis 23 Name: count, dtype: int64 sample BAL-Sarc-1 7 BAL-Sarc-2 16 BAL-healthy-1 4 BAL-healthy-2 6 BAL-healthy-3 3 BAL-healthy-4 1 BAL-healthy-5 3 BAL-healthy-6 1 BAL-healthy-7 2 BAL-healthy-8 7 BAL-healthy-9 1 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset9, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset9, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset9, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset9, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset9, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl9= sc.get.rank_genes_groups_df(cell_subset9, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl9.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl9.csv")
scores_wilcox_cl9
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | CRIP1 | 6.076466 | 3.172896 | 1.228603e-09 | 0.000017 |
| 1 | RNASEK | 4.921748 | 5.358507 | 8.577466e-07 | 0.006019 |
| 2 | EEF1G | 4.722985 | 7.436644 | 2.324080e-06 | 0.010872 |
| 3 | PFN1 | 4.524222 | 0.611135 | 6.061804e-06 | 0.018516 |
| 4 | FTH1 | 4.467432 | 0.547555 | 7.916390e-06 | 0.018516 |
| ... | ... | ... | ... | ... | ... |
| 14029 | POLR2F | -3.823820 | -5.164629 | 1.314001e-04 | 0.073763 |
| 14030 | DBNL | -3.975258 | -4.570426 | 7.030302e-05 | 0.054813 |
| 14031 | TRNAU1AP | -4.164556 | -3.726077 | 3.119592e-05 | 0.027976 |
| 14032 | HLA-DRB5 | -4.353854 | -30.928776 | 1.337649e-05 | 0.020858 |
| 14033 | CCAR1 | -4.467432 | -3.992137 | 7.916390e-06 | 0.018516 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result9 = perform_deseq_analysis(cell_subset9, "/home/jana/BAL_GEX/cl_deseq_result9.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) Fitting dispersions... ... done in 3.28 seconds. Fitting dispersion trend curve... ... done in 1.82 seconds. Fitting MAP dispersions... ... done in 5.91 seconds. Fitting LFCs... ... done in 4.54 seconds. Refitting 0 outliers. Running Wald tests... ... done in 8.86 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.018837 -0.692559 4.217766 -0.164200 0.869573 0.999562
FAM41C 0.000000 NaN NaN NaN NaN NaN
NOC2L 0.130708 -2.645004 3.659528 -0.722772 0.469820 0.999562
KLHL17 0.037170 -2.279438 2.442596 -0.933203 0.350715 0.999562
PLEKHN1 0.000000 NaN NaN NaN NaN NaN
... ... ... ... ... ... ...
AC011043.1 0.000000 NaN NaN NaN NaN NaN
AL354822.1 0.043685 -2.316235 4.295351 -0.539242 0.589720 0.999562
AL592183.1 0.018837 -0.692559 4.217766 -0.164200 0.869573 0.999562
AC240274.1 0.000000 NaN NaN NaN NaN NaN
AC007325.4 0.000000 NaN NaN NaN NaN NaN
[14034 rows x 6 columns]
None
cl_deseq_result9
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| CRIP1 | 3.493540 | 1.956153 | 0.582857 | 3.356144 | 0.000790 | 0.999562 |
| RNASEK | 1.360242 | 3.998621 | 1.197910 | 3.337997 | 0.000844 | 0.999562 |
| GABARAP | 1.534525 | 2.892684 | 1.020611 | 2.834266 | 0.004593 | 0.999562 |
| SARNP | 0.984332 | 2.944109 | 1.060832 | 2.775283 | 0.005515 | 0.999562 |
| HIST1H3G | 1.179538 | 2.373109 | 0.889869 | 2.666807 | 0.007658 | 0.999562 |
| ... | ... | ... | ... | ... | ... | ... |
| SPRY3 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| MAFIP | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC011043.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC240274.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC007325.4 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset10.layers["counts"] = cell_subset10.X.copy()
cell_subset10
AnnData object with n_obs × n_vars = 660 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset10.obs['type'].value_counts().sort_index())
print(cell_subset10.obs['sample'].value_counts().sort_index())
type Healthy 377 Sarcoidosis 283 Name: count, dtype: int64 sample BAL-Sarc-1 96 BAL-Sarc-2 79 BAL-Sarc-3 108 BAL-healthy-1 63 BAL-healthy-2 30 BAL-healthy-3 33 BAL-healthy-4 44 BAL-healthy-5 12 BAL-healthy-6 81 BAL-healthy-7 34 BAL-healthy-8 45 BAL-healthy-9 7 BAL-healthy-10 28 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset10, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset10, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset10, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset10, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset10, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl10= sc.get.rank_genes_groups_df(cell_subset10, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl10.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl10.csv")
scores_wilcox_cl10
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | CRIP1 | 17.012199 | 3.641234 | 6.668710e-65 | 9.358868e-61 |
| 1 | TMSB4X | 15.659605 | 0.549960 | 2.856655e-55 | 2.004515e-51 |
| 2 | B2M | 15.073028 | 0.514635 | 2.436862e-51 | 1.139964e-47 |
| 3 | S100A4 | 14.830476 | 0.948769 | 9.307041e-50 | 3.265375e-46 |
| 4 | TMSB10 | 14.661144 | 0.474524 | 1.143301e-48 | 3.209017e-45 |
| ... | ... | ... | ... | ... | ... |
| 14029 | KLF6 | -7.126796 | -1.803686 | 1.027321e-12 | 3.516445e-10 |
| 14030 | JUN | -7.704093 | -2.124373 | 1.317763e-14 | 4.998239e-12 |
| 14031 | ARID1B | -8.081944 | -2.589953 | 6.374219e-16 | 2.631053e-13 |
| 14032 | PNISR | -9.146612 | -2.231387 | 5.874713e-20 | 3.170990e-17 |
| 14033 | FUS | -9.194462 | -2.340726 | 3.768754e-20 | 2.115628e-17 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result10 = perform_deseq_analysis(cell_subset10, "/home/jana/BAL_GEX/cl_deseq_result10.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) Fitting dispersions... ... done in 6.18 seconds. Fitting dispersion trend curve... ... done in 4.22 seconds. Fitting MAP dispersions... ... done in 20.19 seconds. Fitting LFCs... ... done in 6.37 seconds. Refitting 4 outliers. Fitting dispersions... ... done in 0.02 seconds. Fitting MAP dispersions... ... done in 0.03 seconds. Fitting LFCs... ... done in 0.02 seconds. Running Wald tests... ... done in 7.97 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.819109 -1.874481 1.299410 -1.442563 0.149144 NaN
FAM41C 0.000000 NaN NaN NaN NaN NaN
NOC2L 7.637772 -0.954569 0.380722 -2.507256 0.012167 0.121658
KLHL17 0.616054 0.237221 2.429092 0.097658 0.922204 NaN
PLEKHN1 1.722812 -0.314638 1.447477 -0.217370 0.827920 NaN
... ... ... ... ... ... ...
AC011043.1 0.073691 -1.538455 4.053435 -0.379544 0.704284 NaN
AL354822.1 0.853826 -2.575725 2.098584 -1.227363 0.219686 NaN
AL592183.1 0.196485 -1.875584 4.032812 -0.465081 0.641874 NaN
AC240274.1 0.032570 -1.277302 4.072488 -0.313642 0.753793 NaN
AC007325.4 0.147927 -1.841458 4.034718 -0.456403 0.648100 NaN
[14034 rows x 6 columns]
None
cl_deseq_result10
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| RNASEK | 23.931361 | 3.571683 | 0.191326 | 18.668059 | 9.006631e-78 | 6.650496e-74 |
| EEF1G | 20.919697 | 6.631341 | 0.480072 | 13.813216 | 2.121461e-43 | 7.832435e-40 |
| UBE2V1 | 10.701264 | 3.752639 | 0.277249 | 13.535269 | 9.682731e-42 | 2.383243e-38 |
| PSMA6 | 12.021773 | 1.991520 | 0.194540 | 10.237068 | 1.352737e-24 | 2.497152e-21 |
| POLD4 | 9.271854 | 2.180381 | 0.230603 | 9.455119 | 3.226543e-21 | 4.431862e-18 |
| ... | ... | ... | ... | ... | ... | ... |
| SMIM10 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| MAMLD1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| GABRE | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| ZFP92 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| TMLHE-AS1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset11.layers["counts"] = cell_subset11.X.copy()
cell_subset11
AnnData object with n_obs × n_vars = 471 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset11.obs['type'].value_counts().sort_index())
print(cell_subset11.obs['sample'].value_counts().sort_index())
type Healthy 328 Sarcoidosis 143 Name: count, dtype: int64 sample BAL-Sarc-1 68 BAL-Sarc-2 17 BAL-Sarc-3 58 BAL-healthy-1 29 BAL-healthy-2 22 BAL-healthy-3 18 BAL-healthy-4 38 BAL-healthy-5 14 BAL-healthy-6 92 BAL-healthy-7 47 BAL-healthy-8 40 BAL-healthy-9 9 BAL-healthy-10 19 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset11, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset11, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset11, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset11, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset11, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl11= sc.get.rank_genes_groups_df(cell_subset11, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl11.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl11.csv")
scores_wilcox_cl11
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | CRIP1 | 11.047897 | 4.544742 | 2.244097e-28 | 3.149365e-24 |
| 1 | MTRNR2L12 | 10.515601 | 4.041378 | 7.321214e-26 | 4.066439e-22 |
| 2 | TMSB4X | 10.499404 | 0.576220 | 8.692686e-26 | 4.066439e-22 |
| 3 | B2M | 9.599729 | 0.394385 | 8.015497e-22 | 2.812237e-18 |
| 4 | EEF1A1 | 9.565126 | 0.622557 | 1.120649e-21 | 3.145437e-18 |
| ... | ... | ... | ... | ... | ... |
| 14029 | FYB1 | -4.613230 | -1.957722 | 3.964599e-06 | 1.686036e-03 |
| 14030 | SYNE2 | -4.866493 | -1.974243 | 1.135957e-06 | 5.142587e-04 |
| 14031 | FUS | -5.179392 | -2.105179 | 2.226102e-07 | 1.041371e-04 |
| 14032 | PNISR | -5.540882 | -2.033932 | 3.009521e-08 | 1.759817e-05 |
| 14033 | JUN | -6.183244 | -2.496010 | 6.279739e-10 | 4.376547e-07 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result11 = perform_deseq_analysis(cell_subset11, "/home/jana/BAL_GEX/cl_deseq_result11.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), Fitting dispersions... ... done in 5.72 seconds. Fitting dispersion trend curve... ... done in 2.73 seconds. Fitting MAP dispersions... ... done in 4.96 seconds. Fitting LFCs... ... done in 5.50 seconds. Refitting 4 outliers. Fitting dispersions... ... done in 0.02 seconds. Fitting MAP dispersions... ... done in 0.02 seconds. Fitting LFCs... ... done in 0.02 seconds. Running Wald tests... ... done in 7.41 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.167781 0.567340 3.746200 0.151444 0.879625 NaN
FAM41C 0.000000 NaN NaN NaN NaN NaN
NOC2L 4.244611 -1.035358 0.968214 -1.069348 0.284913 0.880008
KLHL17 0.532349 -2.201015 3.232242 -0.680956 0.495899 NaN
PLEKHN1 0.387893 -0.242373 2.722464 -0.089027 0.929060 NaN
... ... ... ... ... ... ...
AC011043.1 0.000000 NaN NaN NaN NaN NaN
AL354822.1 0.857421 0.142842 1.770238 0.080691 0.935688 NaN
AL592183.1 1.053596 1.198903 1.903149 0.629958 0.528722 NaN
AC240274.1 0.324079 -1.724633 3.936760 -0.438084 0.661325 NaN
AC007325.4 0.172272 -0.936777 4.073025 -0.229995 0.818095 NaN
[14034 rows x 6 columns]
None
cl_deseq_result11
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| EEF1G | 10.839592 | 6.210625 | 0.714294 | 8.694772 | 3.475264e-18 | 2.085853e-14 |
| NME2 | 6.666161 | 6.220726 | 1.007041 | 6.177234 | 6.523430e-10 | 1.957681e-06 |
| MIF | 15.368977 | 2.783452 | 0.455953 | 6.104689 | 1.030008e-09 | 2.060703e-06 |
| ATP6V0C | 5.407263 | 6.996825 | 1.191234 | 5.873594 | 4.264475e-09 | 6.398844e-06 |
| AL627171.2 | 4.399989 | 6.234688 | 1.129648 | 5.519141 | 3.406616e-08 | 4.089302e-05 |
| ... | ... | ... | ... | ... | ... | ... |
| ZFP92 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| SLC6A8 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| TMLHE-AS1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| SPRY3 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC011043.1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset12.layers["counts"] = cell_subset12.X.copy()
cell_subset12
AnnData object with n_obs × n_vars = 2281 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset12.obs['type'].value_counts().sort_index())
print(cell_subset12.obs['sample'].value_counts().sort_index())
type Healthy 1221 Sarcoidosis 1060 Name: count, dtype: int64 sample BAL-Sarc-1 344 BAL-Sarc-2 290 BAL-Sarc-3 426 BAL-healthy-1 117 BAL-healthy-2 85 BAL-healthy-3 85 BAL-healthy-4 180 BAL-healthy-5 27 BAL-healthy-6 240 BAL-healthy-7 132 BAL-healthy-8 252 BAL-healthy-9 27 BAL-healthy-10 76 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset12, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset12, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset12, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset12, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset12, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl12= sc.get.rank_genes_groups_df(cell_subset12, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl12.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl12.csv")
scores_wilcox_cl12
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | CRIP1 | 31.087542 | 3.472797 | 3.549505e-212 | 4.981376e-208 |
| 1 | TMSB4X | 28.076586 | 0.436775 | 1.892514e-173 | 1.327977e-169 |
| 2 | B2M | 24.909048 | 0.411484 | 5.936806e-137 | 2.777238e-133 |
| 3 | SH3BGRL3 | 24.395325 | 0.487571 | 1.917190e-131 | 6.726461e-128 |
| 4 | TMSB10 | 24.282406 | 0.344397 | 3.007760e-130 | 8.442180e-127 |
| ... | ... | ... | ... | ... | ... |
| 14029 | KLF6 | -12.599648 | -1.783262 | 2.120894e-36 | 9.601490e-34 |
| 14030 | FUS | -13.053679 | -1.965878 | 6.055180e-39 | 2.832613e-36 |
| 14031 | CXCR4 | -13.132016 | -2.098594 | 2.158315e-39 | 1.044475e-36 |
| 14032 | JUN | -14.879807 | -2.025774 | 4.457802e-50 | 2.406184e-47 |
| 14033 | PNISR | -15.904675 | -2.243918 | 5.880802e-57 | 4.126559e-54 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result12 = perform_deseq_analysis(cell_subset12, "/home/jana/BAL_GEX/cl_deseq_result12.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), Fitting dispersions... ... done in 6.44 seconds. Fitting dispersion trend curve... ... done in 2.18 seconds. Fitting MAP dispersions... ... done in 6.12 seconds. Fitting LFCs... ... done in 6.37 seconds. Refitting 9 outliers. Fitting dispersions... ... done in 0.02 seconds. Fitting MAP dispersions... ... done in 0.02 seconds. Fitting LFCs... ... done in 0.02 seconds. Running Wald tests... ... done in 7.52 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 2.666504 -1.661299 1.038355 -1.599934 0.109613 0.592757
FAM41C 0.128481 -0.267041 3.743404 -0.071336 0.943130 NaN
NOC2L 29.416559 -0.482734 0.311500 -1.549712 0.121211 0.619120
KLHL17 0.660642 0.449772 1.708087 0.263319 0.792305 NaN
PLEKHN1 2.591261 -1.072040 1.033535 -1.037256 0.299616 0.826228
... ... ... ... ... ... ...
AC011043.1 0.065001 -1.500534 4.072621 -0.368444 0.712542 NaN
AL354822.1 1.903469 -3.851939 1.717231 -2.243110 0.024890 NaN
AL592183.1 2.510021 0.296514 1.322248 0.224250 0.822563 0.977700
AC240274.1 0.841401 0.294826 2.297430 0.128329 0.897889 NaN
AC007325.4 1.115593 -3.846508 2.349347 -1.637267 0.101575 NaN
[14034 rows x 6 columns]
None
cl_deseq_result12
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| RNASEK | 69.669403 | 3.847914 | 0.264431 | 14.551674 | 5.699120e-48 | 5.510479e-44 |
| NME2 | 44.733752 | 6.293025 | 0.448005 | 14.046786 | 8.061065e-45 | 3.897122e-41 |
| TRAPPC5 | 24.736341 | 5.600736 | 0.448022 | 12.501028 | 7.369233e-36 | 2.375104e-32 |
| AL627171.2 | 30.833065 | 7.709480 | 0.680529 | 11.328665 | 9.463633e-30 | 2.287597e-26 |
| COX16 | 19.552164 | 3.685838 | 0.357415 | 10.312484 | 6.188100e-25 | 9.972123e-22 |
| ... | ... | ... | ... | ... | ... | ... |
| RHOXF1-AS1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| TMEM255A | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| LINC02243 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| GABRE | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| TMLHE-AS1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset13
cell_subset13.layers["counts"] = cell_subset13.X.copy()
cell_subset13
AnnData object with n_obs × n_vars = 1251 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset13.obs['type'].value_counts().sort_index())
print(cell_subset13.obs['sample'].value_counts().sort_index())
type Healthy 768 Sarcoidosis 483 Name: count, dtype: int64 sample BAL-Sarc-1 153 BAL-Sarc-2 131 BAL-Sarc-3 199 BAL-healthy-1 52 BAL-healthy-2 53 BAL-healthy-3 48 BAL-healthy-4 104 BAL-healthy-5 38 BAL-healthy-6 191 BAL-healthy-7 86 BAL-healthy-8 129 BAL-healthy-9 16 BAL-healthy-10 51 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset13, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset13, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset13, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset13, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset13, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl13= sc.get.rank_genes_groups_df(cell_subset13, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl13.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl13.csv")
scores_wilcox_cl13
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | CRIP1 | 23.962482 | 3.403234 | 6.848269e-127 | 9.610861e-123 |
| 1 | TMSB4X | 19.851862 | 0.360006 | 1.061928e-87 | 7.451549e-84 |
| 2 | CD52 | 19.330088 | 0.572348 | 2.999103e-83 | 1.402981e-79 |
| 3 | SH3BGRL3 | 18.123222 | 0.359888 | 2.089876e-73 | 7.332330e-70 |
| 4 | B2M | 17.761309 | 0.363018 | 1.409123e-70 | 3.955127e-67 |
| ... | ... | ... | ... | ... | ... |
| 14029 | PNISR | -9.193421 | -1.771650 | 3.805413e-20 | 1.405399e-17 |
| 14030 | CXCR4 | -9.259647 | -2.203255 | 2.051078e-20 | 7.779684e-18 |
| 14031 | FUS | -9.877226 | -2.018399 | 5.225982e-23 | 2.222468e-20 |
| 14032 | JUN | -11.207541 | -1.954440 | 3.744506e-29 | 2.021169e-26 |
| 14033 | KLF6 | -12.391902 | -2.200589 | 2.891049e-35 | 1.764043e-32 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result13 = perform_deseq_analysis(cell_subset13, "/home/jana/BAL_GEX/cl_deseq_result13.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. /home/jana/my-notebook-venv/lib/python3.8/site-packages/numpy/linalg/linalg.py:2079: RuntimeWarning: invalid value encountered in slogdet sign, logdet = _umath_linalg.slogdet(a, signature=signature) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:667: RuntimeWarning: invalid value encountered in log log_alpha_hat = np.log(alpha_hat) /home/jana/my-notebook-venv/lib/python3.8/site-packages/pydeseq2/utils.py:708: RuntimeWarning: invalid value encountered in log x0=np.log(alpha_hat), Fitting dispersions... ... done in 6.43 seconds. Fitting dispersion trend curve... ... done in 3.67 seconds. Fitting MAP dispersions... ... done in 5.52 seconds. Fitting LFCs... ... done in 5.29 seconds. Refitting 9 outliers. Fitting dispersions... ... done in 0.02 seconds. Fitting MAP dispersions... ... done in 0.02 seconds. Fitting LFCs... ... done in 0.02 seconds. Running Wald tests... ... done in 7.46 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 0.570465 -1.343044 2.067266 -0.649672 0.515904 NaN
FAM41C 0.169413 -1.841242 4.029495 -0.456941 0.647713 NaN
NOC2L 18.289626 -0.381779 0.347688 -1.098052 0.272182 0.736092
KLHL17 1.339335 -1.523718 1.667183 -0.913948 0.360744 NaN
PLEKHN1 2.731696 -0.717272 0.961239 -0.746195 0.455549 NaN
... ... ... ... ... ... ...
AC011043.1 0.058437 -1.183069 4.070716 -0.290629 0.771335 NaN
AL354822.1 0.935286 0.314388 1.562725 0.201179 0.840558 NaN
AL592183.1 0.536345 -1.797201 2.763024 -0.650447 0.515404 NaN
AC240274.1 0.262298 -0.058689 3.505821 -0.016740 0.986644 NaN
AC007325.4 0.593038 -2.971550 2.951804 -1.006689 0.314084 NaN
[14034 rows x 6 columns]
None
cl_deseq_result13
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| RNASEK | 42.553623 | 3.614094 | 0.242365 | 14.911760 | 2.763685e-50 | 1.974929e-46 |
| NME2 | 25.240157 | 7.187903 | 0.577654 | 12.443269 | 1.521473e-35 | 5.436224e-32 |
| TRAPPC5 | 14.073711 | 5.172814 | 0.490742 | 10.540795 | 5.602326e-26 | 1.334474e-22 |
| ATP6V0C | 17.472623 | 7.469937 | 0.744026 | 10.039891 | 1.017862e-23 | 1.818411e-20 |
| GABARAP | 30.502046 | 3.183671 | 0.327402 | 9.724044 | 2.381328e-22 | 3.403394e-19 |
| ... | ... | ... | ... | ... | ... | ... |
| FIRRE | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| GPC4 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| LINC02243 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| RTL8B | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| TMLHE-AS1 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns
cell_subset14
cell_subset14.layers["counts"] = cell_subset14.X.copy()
cell_subset14
AnnData object with n_obs × n_vars = 1188 × 14034
obs: 'type', 'sample', 'percent_chrY', 'XIST-counts', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1', 'S_score', 'G2M_score', 'phase', 'batch', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'umap_density_sample', 'umap_density_leiden', 'leiden_1.0', 'leiden_0.8', 'leiden_0.5', 'leiden_0.4', 'predicted_labels', 'majority_voting', 'conf_score', 'celltypist_cell_label_coarse', 'celltypist_conf_score_coarse', 'over_clustering', 'celltypist_cell_label_fine', 'celltypist_conf_score_fine'
uns: 'celltypist_cell_label_coarse_colors', 'celltypist_cell_label_fine_colors', 'dendrogram_celltypist_cell_label_fine', 'dendrogram_leiden_0.5', 'doublet_info_colors', 'leiden', 'leiden_0.4_colors', 'leiden_0.5_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'logreg', 'neighbors', 'phase_colors', 'sample_colors', 't-test', 't-test_ov', 'type_colors', 'umap', 'umap_density_leiden_params', 'umap_density_sample_params', 'wilcoxon', 'log1p', 'wilcoxon_cp_with_Sarcoidosis', 'wilcoxon_cp_with_Healthy'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals'
layers: 'counts'
obsp: 'connectivities', 'distances'
print(cell_subset14.obs['type'].value_counts().sort_index())
print(cell_subset14.obs['sample'].value_counts().sort_index())
type Healthy 785 Sarcoidosis 403 Name: count, dtype: int64 sample BAL-Sarc-1 139 BAL-Sarc-2 148 BAL-Sarc-3 116 BAL-healthy-1 65 BAL-healthy-2 93 BAL-healthy-3 87 BAL-healthy-4 90 BAL-healthy-5 39 BAL-healthy-6 152 BAL-healthy-7 92 BAL-healthy-8 74 BAL-healthy-9 22 BAL-healthy-10 71 Name: count, dtype: int64
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (6, 6)}):
sc.pl.umap(cell_subset14, color = ['celltypist_cell_label_fine','type','sample'], frameon = False, s = 12)
sc.pl.rank_genes_groups_violin(cell_subset14, n_genes=50, jitter=False, key='wilcoxon_cp_with_Sarcoidosis', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset14, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Sarcoidosis')
WARNING: dendrogram data not found (using key=dendrogram_type). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently. WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Sarcoidosis
sc.pl.rank_genes_groups_violin(cell_subset14, n_genes=50, jitter=False, key='wilcoxon_cp_with_Healthy', scale='width', size=0.0)
sc.pl.rank_genes_groups_heatmap(cell_subset14, n_genes=50, show_gene_labels=True, key='wilcoxon_cp_with_Healthy')
WARNING: Groups are not reordered because the `groupby` categories and the `var_group_labels` are different. categories: Healthy, Sarcoidosis var_group_labels: Healthy
scores_wilcox_cl14= sc.get.rank_genes_groups_df(cell_subset14, group='Sarcoidosis', key='wilcoxon_cp_with_Sarcoidosis')
scores_wilcox_cl14.to_csv("/home/jana/BAL_GEX/scores_wilcox_cl14.csv")
scores_wilcox_cl14
| names | scores | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|
| 0 | CRIP1 | 20.152426 | 3.358052 | 2.562497e-90 | 3.596208e-86 |
| 1 | TMSB4X | 18.704054 | 0.445424 | 4.587989e-78 | 3.219392e-74 |
| 2 | B2M | 17.925303 | 0.450336 | 7.484166e-72 | 3.501093e-68 |
| 3 | FTL | 16.733421 | 0.546338 | 7.481563e-63 | 2.624906e-59 |
| 4 | FTH1 | 16.584904 | 0.490305 | 8.960880e-62 | 2.515140e-58 |
| ... | ... | ... | ... | ... | ... |
| 14029 | CD8B | -9.294380 | -2.292319 | 1.480667e-20 | 7.165406e-18 |
| 14030 | FUS | -9.590877 | -2.098505 | 8.733940e-22 | 4.377575e-19 |
| 14031 | KLRD1 | -11.041926 | -3.029354 | 2.398347e-28 | 1.463409e-25 |
| 14032 | JUN | -11.407815 | -2.281609 | 3.822108e-30 | 2.438157e-27 |
| 14033 | KLF6 | -12.175760 | -2.437144 | 4.184774e-34 | 3.262729e-31 |
14034 rows × 5 columns
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
warnings.filterwarnings("ignore", category=RuntimeWarning)
cl_deseq_result14 = perform_deseq_analysis(cell_subset14, "/home/jana/BAL_GEX/cl_deseq_result14.csv", top_n_genes=10)
Fitting size factors... ... done in 0.02 seconds. Fitting dispersions... ... done in 6.25 seconds. Fitting dispersion trend curve... ... done in 4.24 seconds. Fitting MAP dispersions... ... done in 5.36 seconds. Fitting LFCs... ... done in 5.93 seconds. Refitting 1 outliers. Fitting dispersions... ... done in 0.01 seconds. Fitting MAP dispersions... ... done in 0.01 seconds. Fitting LFCs... ... done in 0.01 seconds. Running Wald tests... ... done in 7.39 seconds.
Log2 fold change & Wald test p-value: type Sarcoidosis vs Healthy
baseMean log2FoldChange lfcSE stat pvalue padj
LINC00115 1.344470 -3.636534 2.399276 -1.515680 0.129600 NaN
FAM41C 0.137531 0.054519 3.810656 0.014307 0.988585 NaN
NOC2L 17.719414 -0.578118 0.333765 -1.732113 0.083253 0.393822
KLHL17 0.743128 1.591720 1.772124 0.898199 0.369080 NaN
PLEKHN1 1.525053 1.180825 1.092627 1.080721 0.279821 NaN
... ... ... ... ... ... ...
AC011043.1 0.070977 -0.907272 4.044377 -0.224329 0.822501 NaN
AL354822.1 0.549028 -0.428416 2.094891 -0.204505 0.837959 NaN
AL592183.1 0.856244 -2.995857 2.500416 -1.198143 0.230861 NaN
AC240274.1 0.257536 -1.658445 4.003772 -0.414221 0.678713 NaN
AC007325.4 0.612166 0.743767 1.900554 0.391342 0.695544 NaN
[14034 rows x 6 columns]
None
cl_deseq_result14
| baseMean | log2FoldChange | lfcSE | stat | pvalue | padj | |
|---|---|---|---|---|---|---|
| EEF1G | 38.748691 | 6.061090 | 0.357212 | 16.967764 | 1.422456e-64 | 1.055747e-60 |
| RNASEK | 47.158335 | 3.664704 | 0.222727 | 16.453788 | 7.878255e-61 | 2.923621e-57 |
| AL627171.2 | 17.661012 | 7.050460 | 0.676433 | 10.422995 | 1.947227e-25 | 4.817440e-22 |
| GABARAP | 33.627991 | 3.362764 | 0.325566 | 10.328974 | 5.211478e-25 | 9.669898e-22 |
| UBE2V1 | 19.326707 | 3.264839 | 0.317673 | 10.277343 | 8.915117e-25 | 1.323360e-21 |
| ... | ... | ... | ... | ... | ... | ... |
| TMEM255A | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| LINC02243 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| AC244197.2 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| PNMA5 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
| SLC6A8 | 0.000000 | NaN | NaN | NaN | NaN | NaN |
14034 rows × 6 columns